### Code for: Spatial-temporal clustering analysis of yaws on Lihir Island, Papua New Guinea
### to enhance planning and implementation of eradication programs
### Eric Q. Mooring, Oriol Mitja, and Megan B. Murray, 2018
### Code 3 of 3
### Permutation-based approach for assessing clustering of yaws cases within primary school attendance areas ###

# Ensure that the package "combinat" is installed.

library(combinat)

### Load data classifying each village by primary school attendance area
### Villages are sorted in geographic order, clockwise starting with Londolovit

SchoolAnalysisData<-read.csv(file = "LihirVillagesPrimarySchools.csv")

### Load and tabulate yaws case data

YawsCases<-read.csv(file = "YawsCases_LMC.csv")

YawsCases8to14<-YawsCases[YawsCases$Age8to14 == 1,]
nrow(YawsCases8to14) # there are 643 yaws cases in this analysis

YawsCases8to14byVill<-as.data.frame(table(YawsCases8to14$Location))
colnames(YawsCases8to14byVill)<-c("Village","Yaws8to14.AllYears")
SchoolAnalysisData<-merge(x=SchoolAnalysisData,y=YawsCases8to14byVill,by.x="Village",by.y="Village",sort = F) # merge data sets

YawsCases8to14byVillbyYear<-as.data.frame.matrix(table(YawsCases8to14$Location,YawsCases8to14$Year))
colnames(YawsCases8to14byVillbyYear)<-c("Yaws8to14.2005","Yaws8to14.2006","Yaws8to14.2007","Yaws8to14.2008",
                                        "Yaws8to14.2009","Yaws8to14.2010","Yaws8to14.2011","Yaws8to14.2012",
                                        "Yaws8to14.2013","Yaws8to14.2014","Yaws8to14.2015","Yaws8to14.2016")
YawsCases8to14byVillbyYear$Village<-rownames(YawsCases8to14byVillbyYear)
SchoolAnalysisData<-merge(x=SchoolAnalysisData,y=YawsCases8to14byVillbyYear,by.x="Village",by.y="Village",sort = F) # merge data sets


### Load and tabulate all outpatient data

AllVisits<-read.csv(file = "AllOutpatient_LMC.csv")

AllVisits8to14<-AllVisits[AllVisits$Age8to14 == 1,]
nrow(AllVisits8to14) # there are 32202 outpatient visits in this analysis

AllVisits8to14byVill<-as.data.frame(table(AllVisits8to14$Location))
colnames(AllVisits8to14byVill)<-c("Village","AllVisits8to14.AllYears")
SchoolAnalysisData<-merge(x=SchoolAnalysisData,y=AllVisits8to14byVill,by.x="Village",by.y="Village",sort = F) # merge data sets

AllVisits8to14byVillbyYear<-as.data.frame.matrix(table(AllVisits8to14$Location,AllVisits8to14$Year))
colnames(AllVisits8to14byVillbyYear)<-c("AllVisits8to14.2005","AllVisits8to14.2006","AllVisits8to14.2007",
                                        "AllVisits8to14.2008","AllVisits8to14.2009","AllVisits8to14.2010",
                                        "AllVisits8to14.2011","AllVisits8to14.2012","AllVisits8to14.2013",
                                        "AllVisits8to14.2014","AllVisits8to14.2015","AllVisits8to14.2016")
AllVisits8to14byVillbyYear$Village<-rownames(AllVisits8to14byVillbyYear)
SchoolAnalysisData<-merge(x=SchoolAnalysisData,y=AllVisits8to14byVillbyYear,by.x="Village",by.y="Village",sort = F) # merge data sets

# (To make primary school analysis plot, run code at least to this point)

### School analysis data has been maintained in geographic order (clockwise from Londolovit)

### Make all permuted school assignments

permutations<-permn(list(rep("A",times=6),rep("B",times=4),rep("B",times=4),rep("B",times=4),rep("B",times=4),rep("C",times=3),rep("D",times=1)))
permutations<-permutations[!duplicated(permutations)]
permutations<-matrix(unlist(permutations),nrow=26,ncol=210,byrow=F)
for(col in 1:ncol(permutations)){permutations[permutations[,col]=="B",col]<-c(rep("B1",times=4),rep("B2",times=4),rep("B3",times=4),rep("B4",times=4))}

num.villages<-26
num.permutations<-ncol(permutations)
SimPrimarySchoolAttendanceAreaMatrix<-matrix(nrow=num.villages,ncol=num.permutations*num.villages)
SimPrimarySchoolAttendanceAreaMatrix[,1:num.permutations]<-permutations
for(vill in 2:num.villages){SimPrimarySchoolAttendanceAreaMatrix[,(num.permutations*(vill-1)+1):(num.permutations*vill)]<-permutations[c(vill:num.villages,1:(vill-1)),]}

### Define the function that will run the analysis

school.clustering.func<-function(numerator,denominator,obs.group,permute.group){
  outcome<-numerator/denominator
  inv.var.n<- denominator+4  
  inv.var.p<- (numerator+2)/inv.var.n
  inv.var.q<- 1 - inv.var.p
  inv.var.wgts<-inv.var.n/(inv.var.p*inv.var.q)
  Obs.F.Stat<-anova(lm(formula=outcome~as.factor(obs.group),weights=inv.var.wgts))$"F value"[1]
  total.perms<-ncol(permute.group)
  Permute.F.Stats<-vector(length=total.perms)
  for(permutation in 1:total.perms){
    Permute.F.Stats[permutation]<-anova(lm(formula=outcome~as.factor(permute.group[,permutation]),weights=inv.var.wgts))$"F value"[1]}
  Exact.p.value<-sum(Permute.F.Stats>=Obs.F.Stat)/total.perms
  return(list(Obs.F.Stat,Permute.F.Stats,Exact.p.value))
}

### Run the analysis for all years and then for each year 2006 to 2015 individually

Result.8to14.AllYears<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.AllYears,
                                              denominator = SchoolAnalysisData$AllVisits8to14.AllYears,
                                              obs.group = SchoolAnalysisData$PrimarySchool,
                                              permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2006<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2006,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2006,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2007<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2007,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2007,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2008<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2008,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2008,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2009<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2009,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2009,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2010<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2010,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2010,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2011<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2011,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2011,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2012<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2012,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2012,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2013<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2013,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2013,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2014<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2014,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2014,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

Result.8to14.2015<-school.clustering.func(numerator = SchoolAnalysisData$Yaws8to14.2015,
                                          denominator = SchoolAnalysisData$AllVisits8to14.2015,
                                          obs.group = SchoolAnalysisData$PrimarySchool,
                                          permute.group = SimPrimarySchoolAttendanceAreaMatrix)

### Get the p-values for each analysis

Result.8to14.AllYears[[3]]

c(Result.8to14.2006[[3]],Result.8to14.2007[[3]],Result.8to14.2008[[3]],Result.8to14.2009[[3]],Result.8to14.2010[[3]],
  Result.8to14.2011[[3]],Result.8to14.2012[[3]],Result.8to14.2013[[3]],Result.8to14.2014[[3]],Result.8to14.2015[[3]])

